#! /home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import datetime as d
import pandas as pd
import numpy as np
from collections import Counter
'''
用户加入慕课的时长
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('user_course_time_path')
    parser.add_argument('event')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()

user_course_time_path = args['user_course_time_path']
event = args['event']
output = args['output']

data = pd.read_csv(user_course_time_path)

time_format = "%Y-%m-%dT%H:%M:%S"

def parse_rcd(rcd):
    if not rcd:
        return []
    try:
        times = [t.strptime(time, time_format) for time in rcd.split()]
        dates = [d.datetime(_time.tm_year, _time.tm_mon, _time.tm_mday, _time.tm_hour, _time.tm_min, _time.tm_sec) for _time in times]
        return dates
    except:
        return []

event_date = getattr(data, event).map( parse_rcd)

train = pd.DataFrame()
train['username'] = data.username
train['date'] = event_date

user_event_dates = {}
for username, dates in train.values:
    if username not in user_event_dates:
        user_event_dates[username] = []
    user_event_dates[username] += dates

# output
usernames = []
n_days = []
for username, dates in user_event_dates.items():
    dates = sorted(dates)
    ndays = (dates[-1] - dates[0]).days + 1
    usernames.append(username)
    n_days.append(ndays)

out_data = pd.DataFrame()
out_data['username'] = usernames
out_data['ndays'] = n_days

out_data.to_csv(output, index=False)
